#!/usr/bin/ruby
# $Id$
require 'rexml/document'

$context = { :ignore_whitespace_nodes => :all }

def levelAndNext(h)
  retval = [nil, nil]
  case h.name
  when 'chapter'
    retval = [0, 'sect1']
  when /sect([1-4])/
    level = $1.to_i
    retval = [level, 'sect' + (level+1).to_s]
  when 'sect5'
    retval = [5, 'para']
  else
    $stderr.puts(h.inspect)
  end
  return *retval
end

def doPara(p, level, io)
  text = p.text
  io.print("\t" * level, '| ', (text ? text.strip : ''), "\n")
end

def deepTextOfElement(parent)
  text = []
  parent.each_element do |el|
    if el.has_text? 
      text.push(el.text.to_s)
    else
      text.push(deepTextOfElement(el))
    end
  end
  text.join(' ')
end

def doTitle(t, level, io)
  text = deepTextOfElement(t)
  io.print("\t" * level, (text ? text.strip : ''), "\n")
end

def doHead(h, io)
  (level, nextLevel) = levelAndNext(h)
  h.elements.each do |el|
    case el.name
    when 'title'
      doTitle(el, level, io)
    when 'para'
      doPara(el, level, io)
    when nextLevel
      doHead(el, io)
    else
      $stderr.puts(el.inspect)
    end
  end
end

# book/title
# book/bookinfo (title, author, date, corpname)
# book/chapter/title Heading1 {emphasis?}
# | book/chapter/para Text1
#     book/chapter/sect1/title Heading 2
#     | book/chapter/sect1/para Text 2
#       book/chapter/sect2/title Heading 3
#       | book/chapter/sect2/para Text 3
#
#       ... through sect5 (level 6)
#
# chapter includes sect1
#   sectN includes sectN+1
#
IO.popen('antiword -x db ' + ARGV.collect { |arg| "\"#{arg}\"" }.join(' ')) do |io|
  xml = REXML::Document.new(io, $context)
  doHead(xml.root.elements['chapter'], $stdout)
end
